{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "57499cf2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from IPython.display import Markdown, display, update_display\n",
    "from scraper import fetch_website_links, fetch_website_contents\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "310a13f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv(override=True)\n",
    "api_key = os.getenv('OPENAI_API_KEY')\n",
    "\n",
    "client = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "79226a7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "link_analyzer_prompt = \"\"\"\n",
    "You are a skilled research analyst. Your task is to identify the most useful introductory links for a given topic from a list of URLs. \n",
    "You must ignore forum posts, product pages, and social media links. Focus on high-quality articles, documentation, and educational resources.\n",
    "Respond ONLY with a JSON object in the following format:\n",
    "{\n",
    "    \"links\": [\n",
    "        {\"type\": \"overview_article\", \"url\": \"https://...\"},\n",
    "        {\"type\": \"technical_docs\", \"url\": \"https://...\"},\n",
    "        {\"type\": \"history_summary\", \"url\": \"https://...\"}\n",
    "    ]\n",
    "}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "73d02b52",
   "metadata": {},
   "outputs": [],
   "source": [
    "briefing_prompt = \"\"\"\n",
    "You are an expert intelligence analyst. You will be given raw text from several articles about a topic. \n",
    "Your mission is to synthesize this information into a clear and structured research brief. \n",
    "The brief must contain the following sections in Markdown:\n",
    "\n",
    "Research Brief: {topic}\n",
    "\n",
    "1. Executive Summary\n",
    "(A one-paragraph overview of the entire topic.)\n",
    "\n",
    "2. Key Concepts\n",
    "(Use bullet points to list and explain the most important terms and ideas.)\n",
    "\n",
    "3. Important Figures / Events\n",
    "(List the key people, organizations, or historical events relevant to the topic.)\n",
    "\n",
    "4. Further Reading\n",
    "(Provide a list of the original URLs you analyzed for deeper study.)\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ab04efb6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_relevant_links(topic: str, starting_url: str) -> dict:\n",
    "    \n",
    "    # getting all links from the starting URL\n",
    "    links_on_page = fetch_website_links(starting_url)\n",
    "    \n",
    "    # user prompt for the Link Analyst\n",
    "    user_prompt = f\"\"\"\n",
    "    Please analyze the following links related to the topic \"{topic}\" and return the most relevant ones for a research brief.\n",
    "    The main URL is {starting_url}. Make sure all returned URLs are absolute.\n",
    "\n",
    "    Links:\n",
    "    {\"\\n\".join(links_on_page)}\n",
    "    \"\"\"\n",
    "    \n",
    "    response = client.chat.completions.create(\n",
    "        model=\"gpt-4o-mini\", \n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": link_analyzer_prompt},\n",
    "            {\"role\": \"user\", \"content\": user_prompt}\n",
    "        ],\n",
    "        response_format={\"type\": \"json_object\"}\n",
    "    )\n",
    "    \n",
    "    result_json = response.choices[0].message.content\n",
    "    relevant_links = json.loads(result_json)\n",
    "    return relevant_links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "ef6ef363",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_content(links_data: dict) -> str:\n",
    "    all_content = \"\"\n",
    "    original_urls = []\n",
    "\n",
    "    for link in links_data.get(\"links\", []):\n",
    "        url = link.get(\"url\")\n",
    "        if url:\n",
    "            original_urls.append(url)\n",
    "            content = fetch_website_contents(url)\n",
    "            all_content += f\"Content from {url} \\n{content}\\n\\n\"\n",
    "    \n",
    "    all_content += f\"Original URLs for Reference\\n\" + \"\\n\".join(original_urls)\n",
    "    return all_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c2020492",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_research_brief(topic: str, starting_url: str):\n",
    "    relevant_links = get_relevant_links(topic, starting_url)\n",
    "    full_content = get_all_content(relevant_links)\n",
    "\n",
    "    user_prompt = f\"\"\"\n",
    "    Please create a research brief on the topic \"{topic}\" using the following content.\n",
    "    Remember to include the original URLs in the 'Further Reading' section.\n",
    "\n",
    "    Content:\n",
    "    {full_content[:15000]}\n",
    "    \"\"\"\n",
    "    \n",
    "    stream = client.chat.completions.create(\n",
    "        model=\"gpt-4o-mini\",\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": briefing_prompt.format(topic=topic)},\n",
    "            {\"role\": \"user\", \"content\": user_prompt}\n",
    "        ],\n",
    "        stream=True\n",
    "    )\n",
    "    \n",
    "    response = \"\"\n",
    "    display_handle = display(Markdown(\"\"), display_id=True)\n",
    "    for chunk in stream:\n",
    "        response += chunk.choices[0].delta.content or ''\n",
    "        update_display(Markdown(response), display_id=display_handle.display_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "594e940c",
   "metadata": {},
   "outputs": [],
   "source": [
    "create_research_brief(\n",
    "    topic=\"The Rise of Artificial Intelligence\", \n",
    "    starting_url=\"https://en.wikipedia.org/wiki/Artificial_intelligence\"\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm-engineering",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
